library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.3.6 ✔ purrr 0.3.5
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.2.1 ✔ stringr 1.4.1
✔ readr 2.1.3 ✔ forcats 0.5.2 ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
#for general coding
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
#for cleaning
library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:base’:
date, intersect, setdiff, union
#for working with dates
library(readxl)
#for working with excel files
library(fuzzyjoin)
#for working with imperfect strings
library(reshape2)
Attaching package: ‘reshape2’
The following object is masked from ‘package:tidyr’:
smiths
These are files we scraped from the county clerk’s office with information on all codes we pulled for
#Misdemeanors
dwi_mis_raw <- read_csv("Data/scraped/bexar-misds-DWI-20191001-20191130.csv") %>%
mutate(`OFFENSE-CODE` = as.character(`OFFENSE-CODE`)) %>%
mutate(`CASE-CAUSE-NBR` = as.character(`CASE-CAUSE-NBR`)) %>%
mutate(`ADDR-HOUSE-NBR` = as.character(`ADDR-HOUSE-NBR`)) %>%
mutate(`SENTENCE` = as.character(`SENTENCE`)) %>%
mutate(class = "misdemeanor")
New names:Warning: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)Rows: 78696 Columns: 68── Column specification ────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (51): FULL-NAME, ALIAS, SEX, RACE, BIRTHDATE, HOUSE-SUF, ADDR-PRE-DIRECTION, ADDR-STREET, ADDR-STREET-S...
dbl (15): ...1, CASE-CAUSE-NBR, ADDR-HOUSE-NBR, ADDR-ZIP-CODE, OFFENSE-CODE, REDUCED-OFFENSE-CODE, COMPLAIN...
lgl (1): Unnamed: 64
date (1): offense_date_clean
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Felonies
dwi_fel_raw <- read_csv("Data/scraped/bexar-felony-DWI-20191001-20191130.csv") %>%
mutate(`OFFENSE-CODE` = as.character(`OFFENSE-CODE`)) %>%
mutate(`CASE-CAUSE-NBR` = as.character(`CASE-CAUSE-NBR`)) %>%
mutate(`ADDR-HOUSE-NBR` = as.character(`ADDR-HOUSE-NBR`)) %>%
mutate(`SENTENCE` = as.character(`SENTENCE`)) %>%
mutate(class = "felony")
New names:Rows: 8593 Columns: 68── Column specification ────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (52): CASE-CAUSE-NBR, FULL-NAME, ALIAS, SEX, RACE, BIRTHDATE, ADDR-HOUSE-NBR, HOUSE-SUF, ADDR-PRE-DIREC...
dbl (14): ...1, SID, JUDICIAL-NBR, ADDR-ZIP-CODE, OFFENSE-CODE, REDUCED-OFFENSE-CODE, COMPLAINT-DATE, DISPO...
lgl (1): Unnamed: 64
date (1): offense_date_clean
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Together
dwi_raw <- dwi_mis_raw %>%
full_join(dwi_fel_raw)
Joining, by = c("...1", "CASE-CAUSE-NBR", "FULL-NAME", "ALIAS", "SEX", "RACE", "BIRTHDATE", "ADDR-HOUSE-NBR", "HOUSE-SUF", "ADDR-PRE-DIRECTION", "ADDR-STREET", "ADDR-STREET-SUFFIX", "ADDR-POST-DIRECTION", "ADDR-UNIT", "ADDR-CITY", "ADDR-STATE", "ADDR-ZIP-CODE", "ADDR-ZIP-PLUS-4", "OFFENSE-DATE", "OFFENSE-CODE", "OFFENSE-DESC", "OFFENSE-TYPE", "REDUCED-OFFENSE-CODE", "REDUCED-OFFENSE-DESC", "REDUCED-OFFENSE-TYPE", "LOCATION", "CUSTODY-DATE", "COMPLAINT-DATE", "FILING-AGENCY-DESCRIPTION", "CASE-DATE", "CASE-DESC", "SETTING-DATE", "SETTING-TYPE", "G-JURY-DATE", "G-JURY-STATUS", "DISPOSITION-DATE", "DISPOSITION-CODE", "DISPOSITION-DESC", "JUDGEMENT-DATE", "JUDGEMENT-CODE", "JUDGEMENT-DESC", "SENTENCE-DESC", "SENTENCE", "SENTENCE-START-DATE", "SENTENCE-END-DATE", "FINE-AMOUNT", "COURT-COSTS", "COURT-TYPE", "COURT", "POST-JUDICIAL-FIELD", "POST-JUDICIAL-DATE", "BOND-DATE", "BOND-STATUS", "BOND-AMOUNT", "BONDSMAN-NAME", "ATTORNEY", "ATTORNEY-BAR-NBR", "ATTORNEY-APPOINTED-RETAINED", "INTAKE-PROSECUTOR", "OUTTAKE-PROSECUTOR", "PROBATION-PROSECUTOR", "REVOKATION-PROSECUTOR", "ORIGINAL-SENTENCE", "SID", "JUDICIAL-NBR", "Unnamed: 64", "offense_desc_clean", "offense_date_clean", "class")
official_dwi_codes <- read_excel("Data/references_to_import/DWI-related offense codes from excel.xlsx", sheet = 1) %>%
clean_names() %>%
mutate(county_offense_code = as.character(county_offense_code))
Quick look at what we have
dwi_raw_codes <- dwi_raw %>%
group_by(`OFFENSE-CODE`) %>%
tally() %>%
arrange(`OFFENSE-CODE`)
dwi_raw_codes
head(dwi_raw)
check_dwi <- official_dwi_codes %>%
anti_join(dwi_raw, by = c("county_offense_code" = "OFFENSE-CODE"))
check_dwi
clean_dwi_anti <- dwi_raw %>%
anti_join(official_dwi_codes, by = c("OFFENSE-CODE" = "county_offense_code")) %>%
group_by(`OFFENSE-DESC`) %>%
tally()
clean_dwi_anti
clean_dwi_pre <- dwi_raw %>%
inner_join(official_dwi_codes, by = c("OFFENSE-CODE" = "county_offense_code")) %>%
mutate("BIRTHDATE" = mdy(`BIRTHDATE`)) %>%
mutate("OFFENSE-DATE" = mdy(`OFFENSE-DATE`)) %>%
mutate("CUSTODY-DATE" = mdy(`CUSTODY-DATE`)) %>%
mutate("COMPLAINT-DATE" = ymd(`COMPLAINT-DATE`)) %>%
mutate("CASE-DATE" = mdy(`CASE-DATE`)) %>%
mutate("SETTING-DATE" = mdy(`SETTING-DATE`)) %>%
mutate("G-JURY-DATE" = mdy(`G-JURY-DATE`)) %>%
mutate("DISPOSITION-DATE" = mdy(`DISPOSITION-DATE`)) %>%
mutate("JUDGEMENT-DATE" = mdy(`JUDGEMENT-DATE`)) %>%
mutate("SENTENCE-START-DATE" = mdy(`SENTENCE-START-DATE`)) %>%
mutate("SENTENCE-END-DATE" = mdy(`SENTENCE-END-DATE`)) %>%
mutate("POST-JUDICIAL-DATE" = mdy(`POST-JUDICIAL-DATE`)) %>%
mutate("BOND-DATE" = mdy(`BOND-DATE`)) %>%
mutate("original_sentence_years" = substr(`ORIGINAL-SENTENCE`, 1, 3)) %>%
mutate("original_sentence_months" = substr(`ORIGINAL-SENTENCE`, 6, 7)) %>%
mutate("original_sentence_days" = substr(`ORIGINAL-SENTENCE`, 11, 12)) %>%
mutate("original_sentence_hours" = substr(`ORIGINAL-SENTENCE`, 16, 18)) %>%
mutate("offense_date_clean" = ymd(`offense_date_clean`)) %>%
mutate("FULL-NAME" = str_to_title(`FULL-NAME`)) %>%
mutate("offense_year" = floor_date(`OFFENSE-DATE`, unit = "year")) %>%
mutate("judgement_year" = floor_date(`JUDGEMENT-DATE`, unit = "year")) %>%
## May 10: adding this in so we can sort by DA
mutate(da_in_power = case_when(
judgement_year >= as.Date("2009-01-01") & judgement_year <= as.Date("2014-01-01") ~ "Susan Reed",
judgement_year >= as.Date("2015-01-01") & judgement_year <= as.Date("2018-01-01") ~ "Nico Lahood",
judgement_year >= as.Date("2019-01-01") ~ "Joe Gonzales",
is.na(judgement_year) & offense_year >= as.Date("2009-01-01") & offense_year <= as.Date("2014-01-01") ~ paste("Susan Reed BY OFFENSE YEAR", offense_year, sep = " "),
is.na(judgement_year) & offense_year >= as.Date("2015-01-01") & offense_year <= as.Date("2018-01-01") ~ paste("Nico Lahood BY OFFENSE YEAR", offense_year, sep = " "),
is.na(judgement_year) & offense_year >= as.Date("2019-01-01") ~ paste("Joe Gonzales BY OFFENSE YEAR", offense_year, sep = " "),
)) %>%
#select(-judgement_year) %>%
clean_names() %>%
select(-x1) %>%
distinct()
clean_dwi_pre
EDIT NOV. 4, 2022
What are the judgement descriptions here
judge <- clean_dwi_pre %>%
group_by(judgement_desc) %>%
tally()
judge
This clean_dwi_pre file includes the “obstruction passageway” cases that are not alcohol-related. We will remove that in a later step
clean_dwi_offense_group <- clean_dwi_pre %>%
group_by(offense_description, reduced_offense_desc) %>%
tally()
#filter(grepl("OBSTRUCT", paste(offense_description, reduced_offense_desc)))
clean_dwi_offense_group
clean_dwi_offense_group_no_reduce <- clean_dwi_pre %>%
group_by(offense_description) %>%
tally()
#filter(grepl("OBSTRUCT", paste(offense_description, reduced_offense_desc)))
clean_dwi_offense_group_no_reduce
Brian always asks for the maximum date in our dataset so I need to remember that this is it:
max(clean_dwi_pre$offense_date)
min(clean_dwi_pre$offense_date)
Case details are information we pulled from each person’s detailed case page
scraped_april_1 <- read_csv("Data/my_exports/April17/dwi.csv") %>%
filter(eventDescription != "Timeout error")
## June 6th update -- some URLs weren't scraped so here's what we found. Still some people aren't on the site.
scraped_april_2 <- read_csv("Data/scraped/ryan/fixedURLs.csv")
scraped_april_3 <- read_csv("Data/scraped/ryan/timeouts.csv")
scraped_april <- rbind(scraped_april_1, scraped_april_2, scraped_april_3)
check <- scraped_april %>%
group_by(offense_description, reduced_offense_desc) %>%
tally()
check
Here, I’m making sure the data processes as I want it to be. Particularly – that dates are processed as dates
clean_scraped_prep <- scraped_april %>%
#mutate(birthDate = mdy(birthDate)) %>%
select(-birthDate) %>%
mutate(eventDate = mdy(eventDate)) %>%
mutate(case_cause_nbr = as.character(case_cause_nbr)) %>%
filter(is.na(reduced_offense_desc) | reduced_offense_desc != "DRIVING WHILE INTOXICATED") %>%
filter(is.na(reduced_offense_desc) | reduced_offense_desc != "DWI W/BAC 0.15 OR HIGHER")
#mutate(case_cause_nbr = if_else(case_cause_nbr == "0", "2019CR5617W", case_cause_nbr))
clean_scraped_prep
How many people are in this dataset?
clean_scraped_prep_ppl <- clean_scraped_prep %>%
group_by(sid) %>%
tally()
clean_scraped_prep_ppl
There are 19,714 rows, thus 19,714 people who have been charged with obstruction between Jan. 1, 2009 and Jan. 13, 2022
clean_scraped_prep_ppl_case <- clean_scraped_prep %>%
group_by(case_cause_nbr) %>%
tally()
clean_scraped_prep_ppl_case
There are 19,774 rows, thus 19,774 cases of obstruction between Jan. 1, 2009 and Jan. 13, 2022
This is a person who must be in here. Let’s double check their existence
hilario <- clean_scraped_prep %>%
filter(sid == 1087579)
hilario
They exist
Just to review:
full_name = the person’s name
url = the source where the data is from
birthDate = the person’s birthday — I think there is a small problem with this field and it got messed up after the scraping incident, but I’m not worried about it
sex = sex
race = race/ethnicity – it’s a weird column, not reliable, but gives a general idea of the individual’s race
offense_description = description of offense
reduced_offense_description = if applicable, the description of the reduced offense
court = court it was in
case_cause_nbr = case number
sid = a unique number for an individual
character_cnt = ignore, filed used to help me generate the url
eventDate = so we are pulling information from the table, located on the url. That table has all of the docket related events. This is the date of the docket-related event
eventDescription = so we are pulling information from the table, located on the url. That table has all of the docket related events. This is the description of the docket-related event
Data to look for
clean_scrape_prep = dataset that includes EVERY obstruction charge and the docket lines associated with the person and that charge clean_scrape = the work I do below to only look at DWI info in the obstruction charges
The Charges
And we are looking at obstruction-related charges. Here are those specific charges and the number of times they occur (column n)
count_charges <- clean_scraped_prep %>%
select(sid, offense_description, reduced_offense_desc) %>%
distinct() %>%
group_by(offense_description, reduced_offense_desc) %>%
tally() %>%
arrange(desc(n))
count_charges
Now I want to know how many people are in this dataset. The “n” column is the number of times they appear in my dataset of just “DWI-related” docket events. We can ignore that.
people_in_here <- clean_scraped %>%
group_by(sid) %>%
tally()
people_in_here
Answer: of the 19,714 who had an obstruction charge, about 19,444 had some sort of DWI indication. I know this because there are 19,444 rows
people_case_in_here <- clean_scraped %>%
group_by(sid, case_cause_nbr) %>%
#group_by(sid) %>%
tally()
people_case_in_here
And of the 19,774 cases, 19,492 of them were related to drinking.
Now I want to look at people who only have this charge and see if they have alcohol-related stuff
So we go to the DWI docket event data, and say – give me only those with the passageway/roadway charge
Now let’s count the people
passageway <- clean_scraped %>%
filter(grepl("OBSTRUCT PASSAGEWAY/ROADWAY", offense_description) | grepl("OBSTRUCT PASSAGEWAY/ROADWAY", reduced_offense_desc)) %>%
filter(is.na(reduced_offense_desc) | reduced_offense_desc != "OBSTRUCT HIGHWAY-INTOXICATION") %>%
arrange(sid)
passageway
Quick docket check for Allie
write_csv(passageway, "Data/my_exports/allie_check_passageway.csv")
passageway_check_docket <- passageway %>%
group_by(eventDescription) %>%
tally()
passageway_check_docket
Let me double check I’m looking at the charges I want to look at.
passageway_offense_desc <- passageway %>%
select(sid, offense_description, reduced_offense_desc) %>%
distinct() %>%
group_by(offense_description, reduced_offense_desc) %>%
tally() %>%
arrange(desc(n))
passageway_offense_desc
And now let’s count the people with the charge
passageway_ppl <- passageway %>%
group_by(sid) %>%
tally()
passageway_ppl
NA
There are 3,562 people that have an indication of a DWI in the dataset.
passageway_case <- passageway %>%
group_by(case_cause_nbr) %>%
tally() %>%
arrange(-n)
passageway_case
There are 3,563 cases that have an indication of a DWI in the dataset with 3,562 people
But I know the main criteria we want to look for is a BAC above 0.15 or higher. So how many of these people had a BAC of 0.15 or higher?
All the BAC events:
pass_BAC_events <- passageway %>%
filter(grepl(paste(bac_specific_official_dwi_events, collapse="|)"), eventDescription))
pass_BAC_events
All the BAC people:
people_pass_BAC <- pass_BAC_events %>%
group_by(sid) %>%
tally()
people_pass_BAC
There are 1,385 people with a BAC of 0.15 or higher.
case_pass_BAC <- pass_BAC_events %>%
group_by(case_cause_nbr) %>%
tally()
case_pass_BAC
There are 1,385 cases with a BAC of 0.15 or higher.
But you are probably wondering – well how many people had this passageway roadway charge?
Let’s look at the dataset before we filtered for DWI people
count_all_passageway <- clean_scraped_prep %>%
filter(grepl("OBSTRUCT PASSAGEWAY/ROADWAY", offense_description) | grepl("OBSTRUCT PASSAGEWAY/ROADWAY", reduced_offense_desc)) %>%
filter(is.na(reduced_offense_desc) | reduced_offense_desc != "OBSTRUCT HIGHWAY-INTOXICATION") %>%
group_by(sid) %>%
#group_by(sid, case_cause_nbr) %>%
tally()
count_all_passageway
Final Finding: Of the 3,837 cases with obstruction of passageway charge between Jan. 1, 2009 and Jan. 13, 2022, 3,562 of them have been associated with a drinking above the legal limit. 1,385 people had a blood alcohol content above 0.15.
Okay, let’s look at people with the obstruction highway-intoxication charge.
First all of the docket events with this charge:
highway_intox <- clean_scraped %>%
filter(grepl("OBSTRUCT HIGHWAY-INTOX", offense_description) | grepl("OBSTRUCT HIGHWAY-INTOX", reduced_offense_desc)) %>%
filter(is.na(reduced_offense_desc) | reduced_offense_desc != "OBSTRUCT PASSAGEWAY/ROADWAY/WA")
highway_intox
Just to prove to myself that I am in fact looking at people with these only these charges, let’s double check:
highway_intox_offense_desc <- highway_intox %>%
select(sid, offense_description, reduced_offense_desc) %>%
distinct() %>%
group_by(offense_description, reduced_offense_desc) %>%
tally() %>%
arrange(desc(n))
highway_intox_offense_desc
Now the number of people with this charge
case_in_here_highway_intox <- highway_intox %>%
group_by(case_cause_nbr) %>%
#group_by(sid) %>%
tally()
case_in_here_highway_intox
people_in_here_highway_intox <- highway_intox %>%
group_by(sid) %>%
tally()
people_in_here_highway_intox
So, there are 15,929 cases charged with (or had their charge reduced to) “OBSTRUCT HIGHWAY-INTOXICATION.” 15,891 people
But I know the main criteria we want to look for is a BAC above 0.15 or higher. So how many of these people had a BAC of 0.15 or higher?
All the BAC events:
highway_intox_BAC <- highway_intox %>%
filter(grepl(paste(bac_specific_official_dwi_events, collapse="|)"), eventDescription))
highway_intox_BAC
All the BAC people:
case_highway_intox_BAC <- highway_intox_BAC %>%
group_by(case_cause_nbr) %>%
tally()
case_highway_intox_BAC
3,564 cases of BAC 0.15 in highway intoxication
people_highway_intox_BAC <- highway_intox_BAC %>%
group_by(sid) %>%
tally()
people_highway_intox_BAC
So there are 3,564 cases who had a BAC above 0.15 3,558 people
How many people in total had a highway intoxication charge?
count_all_highway_intox <- clean_scraped_prep %>%
filter(grepl("OBSTRUCT HIGHWAY-INTOX", offense_description) | grepl("OBSTRUCT HIGHWAY-INTOX", reduced_offense_desc)) %>%
filter(is.na(reduced_offense_desc) | reduced_offense_desc != "OBSTRUCT PASSAGEWAY/ROADWAY/WA") %>%
group_by(case_cause_nbr) %>%
tally()
count_all_highway_intox
Final Finding: Of the 15,929 cases with an obstruction highway intoxication charge between Jan. 1, 2009 and Jan. 13, 2022, 15,929 of them have been associated with a drinking above the legal limit. 3,564 of the cases had a docket line indicating that their BAC was above 0.15.
bac_master <- clean_scraped %>%
filter(grepl(paste(bac_specific_official_dwi_events, collapse="|)"), eventDescription))
bac_ppl <- bac_master %>%
group_by(sid, case_cause_nbr) %>%
tally() %>%
arrange(desc(n)) %>%
group_by(sid) %>%
tally() %>%
arrange(desc(n))
bac_ppl
docket_976809 <- clean_scraped %>%
filter(sid == 1099456)
docket_976809
1036672
dwi_master <- clean_scraped %>%
filter(grepl(paste(official_dwi_events, collapse="|)"), eventDescription))
dwi_ppl <- dwi_master %>%
group_by(sid, case_cause_nbr) %>%
tally() %>%
arrange(desc(n)) %>%
group_by(sid) %>%
tally() %>%
arrange(desc(n))
dwi_ppl
oct27 <- clean_scraped %>%
filter(eventDate > as.Date("2020-10-27"))
oct27
oct27_ppl <- oct27 %>%
group_by(case_cause_nbr) %>%
tally()
oct27_ppl
490 people
oct27_BAC_events <- oct27 %>%
filter(grepl(paste(bac_specific_official_dwi_events, collapse="|)"), eventDescription))
oct27_BAC_events
oct27_BAC_ppl <- oct27_BAC_events %>%
group_by(case_cause_nbr) %>%
tally()
oct27_BAC_ppl
She wants the following:
Codes to Pull: 540412 – Driving While Intoxicated -3D/M 540405 – Driving While Intoxicated 3rd 540418 – DWI w/ prev intox mansl convict 229999 – DWI Subsequent Offense 499999 – DWI – 3rd Accident 90901 – Intoxication Manslaughter 90902 – Intox Manslaughter Publ Serv
emilie_pull <- clean_individual_by_charges %>%
filter(grepl("540412", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0))
| grepl("540405", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0))
| grepl("540418", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0))
| grepl("229999", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0))
| grepl("499999", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0))
| grepl("90901", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0))
| grepl("90902", paste(offense_description_offense_1.0, reduced_offense_desc_offense_1.0, offense_description_offense_1.1, reduced_offense_desc_offense_1.1, offense_description_offense_1.2, reduced_offense_desc_offense_1.2, offense_description_offense_1.3, reduced_offense_desc_offense_1.3, offense_description_offense_1.4, reduced_offense_desc_offense_1.4, offense_description_offense_1.5, reduced_offense_desc_offense_1.5, offense_description_offense_2.0, reduced_offense_desc_offense_2.0, offense_description_offense_2.1, reduced_offense_desc_offense_2.1, offense_description_offense_2.2, reduced_offense_desc_offense_2.2, offense_description_offense_2.3, reduced_offense_desc_offense_2.3, offense_description_offense_3.0, reduced_offense_desc_offense_3.0, offense_description_offense_3.1, reduced_offense_desc_offense_3.1, offense_description_offense_3.2, reduced_offense_desc_offense_3.2, offense_description_offense_3.3, reduced_offense_desc_offense_3.3, offense_description_offense_3.4, reduced_offense_desc_offense_3.4,offense_description_offense_4.0, reduced_offense_desc_offense_4.0, offense_description_offense_4.1, reduced_offense_desc_offense_4.1,offense_description_offense_4.2, reduced_offense_desc_offense_4.2, offense_description_offense_5.0, reduced_offense_desc_offense_5.0, offense_description_offense_6.0, reduced_offense_desc_offense_6.0, offense_description_offense_7.0, reduced_offense_desc_offense_7.0)))
emilie_pull
540412 – Driving While Intoxicated -3D/M 540405 – Driving While Intoxicated 3rd 540418 – DWI w/ prev intox mansl convict 229999 – DWI Subsequent Offense 499999 – DWI – 3rd Accident 90901 – Intoxication Manslaughter 90902 – Intox Manslaughter Publ Serv
emilie_pull_each_offense <- clean_dwi %>%
filter(offense_code == "540412" | offense_code == "540405" | offense_code == "540418" | offense_code == "229999" | offense_code == "499999" | offense_code == "90901" | offense_code == "90902" | reduced_offense_code == "540412" | reduced_offense_code == "540405" | reduced_offense_code == "540418" | reduced_offense_code == "229999" | reduced_offense_code == "499999" | reduced_offense_code == "90901" |reduced_offense_code == "90902") %>%
left_join(individual, by = "sid") %>%
rename(number_of_dwi_related_offenses = n) %>%
select(sid, full_name, sex, race, birthdate, number_of_dwi_related_offenses, everything())
emilie_pull_each_offense
#write_csv(emilie_pull, "my_exports/April17/emilie_pull.csv")
#write_csv(emilie_pull_each_offense, "Data/my_exports/May13/may11_emiliepull_by_offense.csv")